################################################################################
# 07_figures.R
# Generate All Figures for JHR Paper
#
# Figures Generated:
#   - Figure 1: Test Difficulty Index Over Time
#   - Figure 2A: Delta TDI vs % Change in Enrollments
#   - Figure 2B: Delta TDI vs % Change in Graduations
#   - Figure 3: Education Major Enrollments Event Study
#   - Figure 4: Teacher Preparation Graduations Event Study
#   - Figure 5A-C: Placebo Event Studies
#   - Figure 6: Log New Teacher Licenses Event Study
#   - Figure 7: Teacher Shortage Areas Event Study
#   - Figure A2: Title II Graduations Event Study
#   - Figure A3: Delta TDI vs % Change in Licenses
################################################################################

library(ggplot2)
library(readr)
library(dplyr)
library(tidyr)
library(stringr)
library(readxl)
library(writexl)
library(ggrepel)
library(scales)
library(glue)
library(haven)

# Output directory
out_dir <- "output/figures"
if (!dir.exists(out_dir)) dir.create(out_dir, recursive = TRUE)

# ──────────────────────────────────────────────────────────────────────────────
# Common Theme for All Plots
# ──────────────────────────────────────────────────────────────────────────────

theme_jhr <- function() {
  theme_minimal(base_size = 12) +
    theme(
      legend.position = "bottom",
      legend.box.background = element_rect(colour = "black"),
      panel.border = element_blank(),
      panel.grid.major = element_blank(),
      panel.grid.minor = element_blank(),
      axis.line.x = element_line(color = "black", linewidth = 0.5),
      axis.line.y = element_line(color = "black", linewidth = 0.5),
      axis.title.x = element_text(size = 14),
      axis.title.y = element_text(size = 16),
      plot.title = element_text(hjust = 0.5, face = "bold"),
      plot.background = element_rect(fill = "white", color = NA),
      panel.background = element_rect(fill = "white", color = NA)
    )
}

save_figure <- function(p_bw, name, width = 10, height = 8, p_color = NULL) {
  # Save B&W versions
  ggsave(file.path(out_dir, paste0(name, ".pdf")), p_bw, width = width, height = height, bg = "white")
  ggsave(file.path(out_dir, paste0(name, ".png")), p_bw, width = width, height = height, dpi = 300, bg = "white")
  # Save color versions
  if (!is.null(p_color)) {
    ggsave(file.path(out_dir, paste0(name, "_color.pdf")), p_color, width = width, height = height, bg = "white")
    ggsave(file.path(out_dir, paste0(name, "_color.png")), p_color, width = width, height = height, dpi = 300, bg = "white")
  }
  cat("  Saved:", name, "(B&W + color)\n")
}

# ──────────────────────────────────────────────────────────────────────────────
# Helper: Process Event Study from Stata XLSX Output
# ──────────────────────────────────────────────────────────────────────────────

process_stata_event_study <- function(file_path, ref_year = 2012) {
  # Try to read as XLSX first
  raw <- tryCatch({
    read_excel(file_path, col_names = FALSE)
  }, error = function(e) {
    # If XLSX read fails, return NULL to trigger XML parsing
    NULL
  })

  # If XLSX read failed, use XML parser
  if (is.null(raw)) {
    return(process_stata_xml_event_study(file_path, ref_year))
  }

  # Find rows with year_ variables
  years <- c()
  estimates <- c()
  ses <- c()

  for (i in 1:nrow(raw)) {
    val <- as.character(raw[i, 1])
    if (!is.na(val) && grepl("^year_\\d{4}$", val)) {
      year <- as.numeric(gsub("year_", "", val))
      coef_str <- as.character(raw[i, 2])
      se_str <- as.character(raw[i + 1, 2])

      # Parse coefficient (remove significance stars)
      coef <- as.numeric(gsub("[*]+$", "", coef_str))
      # Parse SE (remove parentheses)
      se <- as.numeric(gsub("[()]", "", se_str))

      years <- c(years, year)
      estimates <- c(estimates, coef)
      ses <- c(ses, se)
    }
  }

  # Create data frame
  data <- tibble(
    year = years,
    estimate = estimates,
    se = ses
  ) %>%
    mutate(
      conf_low = estimate - 1.96 * se,
      conf_high = estimate + 1.96 * se
    )

  # Add reference year
  data <- data %>%
    add_row(year = ref_year, estimate = 0, se = 0, conf_low = 0, conf_high = 0) %>%
    arrange(year)

  return(data)
}

# Helper: Parse Excel XML format (old .xls files from Stata)
process_stata_xml_event_study <- function(file_path, ref_year = 2012) {
  # Read file as text and parse XML
  library(xml2)
  doc <- read_xml(file_path)

  # Extract all cell data
  cells <- xml_find_all(doc, "//ss:Cell/ss:Data", xml_ns(doc))
  cell_values <- xml_text(cells)

  # Find year_ variables and extract coefficients
  years <- c()
  estimates <- c()
  ses <- c()

  for (i in seq_along(cell_values)) {
    val <- cell_values[i]
    if (grepl("^year_\\d{4}$", val)) {
      year <- as.numeric(gsub("year_", "", val))
      coef_str <- cell_values[i + 1]
      se_str <- cell_values[i + 3]  # Skip empty cell

      coef <- as.numeric(gsub("[*]+$", "", coef_str))
      se <- as.numeric(gsub("[()]", "", se_str))

      if (!is.na(coef) && !is.na(se)) {
        years <- c(years, year)
        estimates <- c(estimates, coef)
        ses <- c(ses, se)
      }
    }
  }

  # Create data frame
  data <- tibble(
    year = years,
    estimate = estimates,
    se = ses
  ) %>%
    mutate(
      conf_low = estimate - 1.96 * se,
      conf_high = estimate + 1.96 * se
    )

  # Add reference year
  data <- data %>%
    add_row(year = ref_year, estimate = 0, se = 0, conf_low = 0, conf_high = 0) %>%
    arrange(year)

  return(data)
}

# ──────────────────────────────────────────────────────────────────────────────
# Helper: Process Event Study CSV (from R fixest output) - LEGACY
# ──────────────────────────────────────────────────────────────────────────────

process_event_study_csv <- function(file_path, ref_year = 2012) {
  # Read the clean CSV format from R output
  data <- read_csv(file_path, show_col_types = FALSE)

  # Rename columns for consistency
  if ("std.error" %in% names(data)) {
    data <- data %>% rename(se = std.error)
  }
  if ("conf.low" %in% names(data)) {
    data <- data %>% rename(conf_low = conf.low, conf_high = conf.high)
  }

  # Return the data, already has reference year with 0s
  data %>% arrange(year)
}

# ──────────────────────────────────────────────────────────────────────────────
# Event Study Plot Function
# ──────────────────────────────────────────────────────────────────────────────

create_event_study_plot <- function(data, y_label, treatment_year = 2013,
                                    shade_start = NULL, shade_label = NULL,
                                    y_limits = c(-0.65, 0.30),
                                    color_mode = "bw") {

  # Color scheme
  if (color_mode == "color") {
    col_point <- "#000080"      # navy
    col_ci    <- "#FF8C00"      # dark orange
    col_zero  <- "#008B00"      # green4
    col_treat <- "red"
    shade_fill  <- "#FFFF99"    # light yellow
    shade_alpha <- 0.6
  } else {
    col_point <- "black"
    col_ci    <- "black"
    col_zero  <- "black"
    col_treat <- "black"
    shade_fill  <- "gray85"
    shade_alpha <- 0.4
  }

  p <- ggplot(data, aes(x = year, y = estimate))

  # Add shading if specified (for graduations with lagged effects)
  if (!is.null(shade_start)) {
    p <- p + annotate("rect",
      xmin = shade_start, xmax = max(data$year) + 0.5,
      ymin = -Inf, ymax = Inf,
      fill = shade_fill, alpha = shade_alpha
    )
  }

  p <- p +
    geom_hline(yintercept = 0, color = col_zero, linewidth = 0.3) +
    geom_vline(xintercept = treatment_year, linetype = "dashed", color = col_treat, linewidth = 0.5) +
    geom_errorbar(aes(ymin = conf_low, ymax = conf_high), color = col_ci, width = 0.2, linewidth = 0.4) +
    geom_point(size = 2.5, color = col_point, fill = col_point, shape = 16) +
    scale_x_continuous(breaks = unique(data$year)) +
    coord_cartesian(ylim = y_limits) +
    labs(x = "Year", y = y_label) +
    theme_jhr() +
    theme(legend.position = "none",
          panel.grid.major.y = element_line(color = "gray90", linewidth = 0.3))

  # Add legend for color version
  if (color_mode == "color") {
    legend_items <- data.frame(
      x = numeric(0), y = numeric(0), label = character(0)
    )
    p <- p +
      geom_point(aes(color = "Estimate"), size = 0, shape = 16) +
      geom_line(aes(color = "Praxis Core replaces PPST"), linewidth = 0, linetype = "dashed") +
      scale_color_manual(
        name = NULL,
        values = c("Estimate" = col_point,
                   "Praxis Core replaces PPST" = col_treat),
        guide = guide_legend(
          override.aes = list(
            shape = c(16, NA),
            linetype = c("blank", "dashed"),
            linewidth = c(0, 0.5),
            size = c(2.5, 0.5)
          )
        )
      ) +
      theme(legend.position = "bottom",
            legend.box.background = element_rect(colour = "black"))

    if (!is.null(shade_start)) {
      # Add shading legend entry via a dummy fill aesthetic
      p <- p +
        annotate("label", x = -Inf, y = -Inf, label = "", fill = shade_fill, alpha = 0) +
        geom_rect(aes(fill = "Expected Lagged Effect Period"),
                  xmin = -Inf, xmax = -Inf, ymin = -Inf, ymax = -Inf,
                  alpha = shade_alpha) +
        scale_fill_manual(name = NULL,
                          values = c("Expected Lagged Effect Period" = shade_fill)) +
        guides(fill = guide_legend(override.aes = list(alpha = shade_alpha)))
    }
  }

  p
}

# ──────────────────────────────────────────────────────────────────────────────
# Figure 1: Test Difficulty Index Over Time
# ──────────────────────────────────────────────────────────────────────────────

if (file.exists("data/cleaned/ets_treatment_data.xlsx")) {
  cat("Creating Figure 1: TDI Over Time...\n")

  tdi_data <- read_excel("data/cleaned/ets_treatment_data.xlsx") %>%
    select(State, year, test_index) %>%
    filter(State != "ND") %>%  # Remove ND
    filter(year <= 2018)

  # Create labels for 2008
  label_states <- tdi_data %>%
    filter(year == 2008) %>%
    mutate(state_label = ifelse(State == "CT", "CT*", State))

  # Deviating states for 2018
  deviating_states <- tdi_data %>%
    filter(year == 2018 & State %in% c("PA", "SC", "ME"))

  # B&W version
  fig1_bw <- ggplot(tdi_data, aes(x = year, y = test_index, group = State)) +
    geom_line() +
    geom_text_repel(
      data = label_states,
      aes(x = year, y = test_index, label = state_label),
      direction = "y", nudge_x = -2, segment.size = 0.1,
      segment.linetype = "dashed", size = 3,
      force_pull = 2, xlim = c(NA, 2007),
      max.overlaps = Inf
    ) +
    geom_text(
      data = deviating_states,
      aes(x = year, y = test_index, label = State),
      vjust = 0, hjust = 0, check_overlap = TRUE
    ) +
    annotate("rect", xmin = 2013, xmax = 2014, ymin = -Inf, ymax = Inf,
             fill = "gray85", alpha = 0.3) +
    annotate("text", x = 2014.3, y = -1.45, label = "Praxis Core replaces PPST",
             color = "black", size = 5, fontface = "italic") +
    labs(x = "Academic Year", y = "Test Difficulty Index (TDI)") +
    scale_x_continuous(breaks = c(2008, 2010, 2012, 2014, 2016, 2018)) +
    scale_y_continuous(limits = c(-1.55, 0)) +
    theme_jhr() +
    theme(legend.position = "none")

  # Color version
  fig1_color <- ggplot(tdi_data, aes(x = year, y = test_index, group = State)) +
    geom_line() +
    geom_text_repel(
      data = label_states,
      aes(x = year, y = test_index, label = state_label),
      direction = "y", nudge_x = -2, segment.size = 0.1,
      segment.linetype = "dashed", size = 3,
      force_pull = 2, xlim = c(NA, 2007),
      max.overlaps = Inf
    ) +
    geom_text(
      data = deviating_states,
      aes(x = year, y = test_index, label = State),
      vjust = 0, hjust = 0, check_overlap = TRUE
    ) +
    annotate("rect", xmin = 2013, xmax = 2014, ymin = -Inf, ymax = Inf,
             fill = "#FFFF99", alpha = 0.6) +
    annotate("text", x = 2014.3, y = -1.45, label = "Praxis Core replaces PPST,\nFall 2013 - Spring 2014",
             color = "red", size = 5, fontface = "italic") +
    labs(x = "Academic Year", y = "Test Difficulty Index (TDI)") +
    scale_x_continuous(breaks = c(2008, 2010, 2012, 2014, 2016, 2018)) +
    scale_y_continuous(limits = c(-1.55, 0)) +
    theme_jhr() +
    theme(legend.position = "none")

  save_figure(fig1_bw, "figure_1_tdi_over_time", p_color = fig1_color)
}

# ──────────────────────────────────────────────────────────────────────────────
# Figures 2A & 2B: Scatter Plots (Delta TDI vs Outcomes)
# ──────────────────────────────────────────────────────────────────────────────

if (file.exists("data/cleaned/enrollment_event_data.xlsx") &&
    file.exists("data/cleaned/graduation_event_data.xlsx")) {

  cat("Creating Figures 2A & 2B: Scatter Plots...\n")

  # Load data
  enrollment_data <- read_excel("data/cleaned/enrollment_event_data.xlsx") %>%
    select(year, eftotlt, State, continuous_treat) %>%
    group_by(State, year) %>%
    summarise(eftotlt = sum(eftotlt, na.rm = TRUE),
              continuous_treat = first(continuous_treat), .groups = "drop")

  graduation_data <- read_excel("data/cleaned/graduation_event_data.xlsx") %>%
    select(year, ctotalt, State, continuous_treat) %>%
    group_by(State, year) %>%
    summarise(ctotalt = sum(ctotalt, na.rm = TRUE),
              continuous_treat = first(continuous_treat), .groups = "drop")

  # Calculate % change 2012 to 2016
  treat_by_state <- graduation_data %>% distinct(State, continuous_treat)

  enrl_pct <- enrollment_data %>%
    filter(year %in% c(2012, 2016)) %>%
    pivot_wider(names_from = year, values_from = eftotlt) %>%
    mutate(pct_change = (`2016` - `2012`) / `2012` * 100) %>%
    select(State, pct_change)

  grad_pct <- graduation_data %>%
    filter(year %in% c(2012, 2016)) %>%
    pivot_wider(names_from = year, values_from = ctotalt) %>%
    mutate(pct_change = (`2016` - `2012`) / `2012` * 100) %>%
    select(State, pct_change)

  # Figure 2A: Enrollments
  df_enrl <- treat_by_state %>% inner_join(enrl_pct, by = "State")

  fig2a_bw <- ggplot(df_enrl, aes(x = continuous_treat, y = pct_change)) +
    geom_smooth(method = "lm", formula = y ~ x, se = FALSE, color = "black", linewidth = 0.8) +
    geom_point(size = 2, color = "black") +
    geom_text_repel(aes(label = State), size = 3, max.overlaps = Inf, color = "black") +
    scale_x_continuous(name = "Change in Test Difficulty Index (2012-2014)") +
    scale_y_continuous(name = str_wrap("% Change in Enrollments (2012-2016)", 35),
                       labels = label_percent(scale = 1)) +
    theme_jhr() +
    theme(legend.position = "none", panel.grid.major.y = element_line(linetype = "dotted", color = "gray80"))

  fig2a_color <- ggplot(df_enrl, aes(x = continuous_treat, y = pct_change)) +
    geom_smooth(method = "lm", formula = y ~ x, se = FALSE, color = "black", linewidth = 0.8) +
    geom_point(size = 2, color = "#000080") +
    geom_text_repel(aes(label = State), size = 3, max.overlaps = Inf, color = "black") +
    scale_x_continuous(name = "Change in Test Difficulty Index (2012-2014)") +
    scale_y_continuous(name = str_wrap("% Change in Enrollments (2012-2016)", 35),
                       labels = label_percent(scale = 1)) +
    theme_jhr() +
    theme(legend.position = "none", panel.grid.major.y = element_line(linetype = "dotted", color = "gray80"))

  save_figure(fig2a_bw, "figure_2a_scatter_enrollments", width = 6, height = 4, p_color = fig2a_color)

  # Figure 2B: Graduations
  df_grad <- treat_by_state %>% inner_join(grad_pct, by = "State")

  fig2b_bw <- ggplot(df_grad, aes(x = continuous_treat, y = pct_change)) +
    geom_smooth(method = "lm", formula = y ~ x, se = FALSE, color = "black", linewidth = 0.8) +
    geom_point(size = 2, color = "black") +
    geom_text_repel(aes(label = State), size = 3, max.overlaps = Inf, color = "black") +
    scale_x_continuous(name = "Change in Test Difficulty Index (2012-2014)") +
    scale_y_continuous(name = str_wrap("% Change in Graduations (2012-2016)", 35),
                       labels = label_percent(scale = 1)) +
    theme_jhr() +
    theme(legend.position = "none", panel.grid.major.y = element_line(linetype = "dotted", color = "gray80"))

  fig2b_color <- ggplot(df_grad, aes(x = continuous_treat, y = pct_change)) +
    geom_smooth(method = "lm", formula = y ~ x, se = FALSE, color = "black", linewidth = 0.8) +
    geom_point(size = 2, color = "#000080") +
    geom_text_repel(aes(label = State), size = 3, max.overlaps = Inf, color = "black") +
    scale_x_continuous(name = "Change in Test Difficulty Index (2012-2014)") +
    scale_y_continuous(name = str_wrap("% Change in Graduations (2012-2016)", 35),
                       labels = label_percent(scale = 1)) +
    theme_jhr() +
    theme(legend.position = "none", panel.grid.major.y = element_line(linetype = "dotted", color = "gray80"))

  save_figure(fig2b_bw, "figure_2b_scatter_graduations", width = 6, height = 4, p_color = fig2b_color)
}

# ──────────────────────────────────────────────────────────────────────────────
# Figures 3 & 4: Main Event Studies (from Stata outputs)
# ──────────────────────────────────────────────────────────────────────────────

# Use R-generated CSV (from 06_main_regressions.R with bootstrap SEs)
if (file.exists("output/tables/composite_enrollments_event_study_total.csv")) {
  cat("Creating Figures 3 & 4: Event Studies (from R bootstrap outputs)...\n")

  enroll_data <- process_event_study_csv("output/tables/composite_enrollments_event_study_total.csv")
  fig3_bw <- create_event_study_plot(enroll_data, "Log Enrollment Estimate")
  fig3_color <- create_event_study_plot(enroll_data, "Log Enrollment Estimate", color_mode = "color")
  save_figure(fig3_bw, "figure_3_enrollments_event_study", p_color = fig3_color)

  grad_data <- process_event_study_csv("output/tables/composite_graduations_event_study_total.csv")
  fig4_bw <- create_event_study_plot(grad_data, "Log Graduation Estimate",
                                      shade_start = 2015)
  fig4_color <- create_event_study_plot(grad_data, "Log Graduation Estimate",
                                         shade_start = 2015, color_mode = "color")
  save_figure(fig4_bw, "figure_4_graduations_event_study", p_color = fig4_color)

# Fallback to Stata XLSX if R CSV not available
} else if (file.exists("output/tables/composite_enrollments_event_study_total.xlsx")) {
  cat("Creating Figures 3 & 4: Event Studies (from Stata outputs)...\n")

  enroll_data <- process_stata_event_study("output/tables/composite_enrollments_event_study_total.xlsx")
  fig3_bw <- create_event_study_plot(enroll_data, "Log Enrollment Estimate")
  fig3_color <- create_event_study_plot(enroll_data, "Log Enrollment Estimate", color_mode = "color")
  save_figure(fig3_bw, "figure_3_enrollments_event_study", p_color = fig3_color)

  grad_data <- process_stata_event_study("output/tables/composite_graduations_event_study_total.xlsx")
  fig4_bw <- create_event_study_plot(grad_data, "Log Graduation Estimate",
                                      shade_start = 2015)
  fig4_color <- create_event_study_plot(grad_data, "Log Graduation Estimate",
                                         shade_start = 2015, color_mode = "color")
  save_figure(fig4_bw, "figure_4_graduations_event_study", p_color = fig4_color)
}

# ──────────────────────────────────────────────────────────────────────────────
# Helper: Process Regression Table CSV (Stata output format)
# ──────────────────────────────────────────────────────────────────────────────

process_placebo_csv <- function(file_path, column = 2, ref_year = 2012) {
  # Read the CSV - it's in regression table format with coefficients and SEs
  lines <- readLines(file_path, warn = FALSE)

  years <- c()
  estimates <- c()
  ses <- c()

  i <- 1
  while (i <= length(lines)) {
    line <- lines[i]
    # Look for year_ pattern
    if (grepl("^year_\\d{4}", line)) {
      parts <- strsplit(line, ",")[[1]]
      year <- as.numeric(gsub("year_", "", parts[1]))

      # Extract coefficient from the specified column (remove significance stars)
      coef_str <- parts[column]
      coef <- as.numeric(gsub("[*]+$", "", coef_str))

      # Next line should have SE in parentheses
      if (i + 1 <= length(lines)) {
        se_line <- lines[i + 1]
        se_parts <- strsplit(se_line, ",")[[1]]
        se_str <- se_parts[column]
        se <- as.numeric(gsub("[()]", "", se_str))

        if (!is.na(coef) && !is.na(se)) {
          years <- c(years, year)
          estimates <- c(estimates, coef)
          ses <- c(ses, se)
        }
      }
      i <- i + 2
    } else {
      i <- i + 1
    }
  }

  # Create data frame
  data <- tibble(
    year = years,
    estimate = estimates,
    se = ses
  ) %>%
    mutate(
      conf_low = estimate - 1.96 * se,
      conf_high = estimate + 1.96 * se
    )

  # Add reference year if not already present
  if (!(ref_year %in% data$year)) {
    data <- data %>%
      add_row(year = ref_year, estimate = 0, se = 0, conf_low = 0, conf_high = 0)
  }

  data <- data %>% arrange(year)

  return(data)
}

# ──────────────────────────────────────────────────────────────────────────────
# Figure 6: License Event Study (from Stata outputs)
# ──────────────────────────────────────────────────────────────────────────────

# License data: R CSV (standard format) → Stata CSV → Stata XLS fallback
if (file.exists("output/tables/state_licenses_event_study_coefficients.csv")) {
  cat("Creating Figure 6: License Event Study (from CSV)...\n")

  # Check if R-generated (has std.error header) or Stata format
  test_header <- readLines("output/tables/state_licenses_event_study_coefficients.csv", n = 1)
  if (grepl("std.error", test_header)) {
    license_data <- process_event_study_csv(
      "output/tables/state_licenses_event_study_coefficients.csv"
    )
    cat("  Using R-generated data\n")
  } else {
    license_data <- process_placebo_csv(
      "output/tables/state_licenses_event_study_coefficients.csv",
      column = 2, ref_year = 2012
    )
    cat("  Using Stata-generated data\n")
  }

  fig6_bw <- create_event_study_plot(license_data, "Log License Estimate",
                                     shade_start = 2015, y_limits = c(-1.0, 0.50))
  fig6_color <- create_event_study_plot(license_data, "Log License Estimate",
                                         shade_start = 2015, y_limits = c(-1.0, 0.50), color_mode = "color")
  save_figure(fig6_bw, "figure_6_licenses_event_study", p_color = fig6_color)
} else if (file.exists("output/tables/state_licenses_event_study_coefficients.xls")) {
  cat("Creating Figure 6: License Event Study (from Stata XLS)...\n")

  license_data <- process_stata_event_study("output/tables/state_licenses_event_study_coefficients.xls")
  fig6_bw <- create_event_study_plot(license_data, "Log License Estimate",
                                      shade_start = 2015, y_limits = c(-1.0, 0.50))
  fig6_color <- create_event_study_plot(license_data, "Log License Estimate",
                                         shade_start = 2015, y_limits = c(-1.0, 0.50), color_mode = "color")
  save_figure(fig6_bw, "figure_6_licenses_event_study", p_color = fig6_color)
}

# ──────────────────────────────────────────────────────────────────────────────
# Figure 7: Teacher Shortage Event Study (from Stata outputs)
# ──────────────────────────────────────────────────────────────────────────────

# Shortage data: R CSV (standard format) → Stata CSV → Stata XLS fallback
if (file.exists("output/tables/teacher_shortage_event_study.csv")) {
  cat("Creating Figure 7: Shortage Event Study (from CSV)...\n")

  # Check if R-generated (has std.error header) or Stata format
  test_header <- readLines("output/tables/teacher_shortage_event_study.csv", n = 1)
  if (grepl("std.error", test_header)) {
    shortage_data <- process_event_study_csv(
      "output/tables/teacher_shortage_event_study.csv"
    )
    cat("  Using R-generated data\n")
  } else {
    shortage_data <- process_placebo_csv(
      "output/tables/teacher_shortage_event_study.csv",
      column = 4, ref_year = 2012
    )
    cat("  Using Stata-generated data\n")
  }

  fig7_bw <- create_event_study_plot(shortage_data, "Log Teacher Shortage Estimate",
                                      shade_start = 2015, y_limits = c(-1.0, 2.1))
  fig7_color <- create_event_study_plot(shortage_data, "Log Teacher Shortage Estimate",
                                         shade_start = 2015, y_limits = c(-1.0, 2.1), color_mode = "color")
  save_figure(fig7_bw, "figure_7_shortages_event_study", p_color = fig7_color)
} else if (file.exists("output/tables/teacher_shortage_event_study.xls")) {
  cat("Creating Figure 7: Shortage Event Study (from Stata XLS)...\n")

  shortage_data <- process_stata_event_study("output/tables/teacher_shortage_event_study.xls")
  fig7_bw <- create_event_study_plot(shortage_data, "Log Teacher Shortage Estimate",
                                      shade_start = 2015, y_limits = c(-1.0, 2.1))
  fig7_color <- create_event_study_plot(shortage_data, "Log Teacher Shortage Estimate",
                                         shade_start = 2015, y_limits = c(-1.0, 2.1), color_mode = "color")
  save_figure(fig7_bw, "figure_7_shortages_event_study", p_color = fig7_color)
}

# ──────────────────────────────────────────────────────────────────────────────
# Figures 5A-C: Placebo Event Studies
# ──────────────────────────────────────────────────────────────────────────────

# Figure 5A: Non-Education Major Enrollments (Placebo)
# Priority: R CSV (from 05_secondary_regressions.R) → Stata CSV fallback
if (file.exists("output/tables/enrollments_placebo_event_study_total.csv")) {
  cat("Creating Figure 5A: Non-Education Enrollments Placebo...\n")

  # Check if this is R-generated CSV (has std.error column) or Stata format
  test_header <- readLines("output/tables/enrollments_placebo_event_study_total.csv", n = 1)
  if (grepl("std.error", test_header)) {
    enroll_placebo_data <- process_event_study_csv(
      "output/tables/enrollments_placebo_event_study_total.csv"
    )
    cat("  Using R-generated data\n")
  } else {
    enroll_placebo_data <- process_placebo_csv(
      "output/tables/enrollments_placebo_event_study_total.csv",
      column = 2, ref_year = 2012
    )
    cat("  Using Stata-generated data\n")
  }

  fig5a_bw <- create_event_study_plot(
    enroll_placebo_data,
    "Log Non-Education Enrollment Estimate",
    treatment_year = 2013,
    y_limits = c(-0.40, 0.30)
  )
  fig5a_color <- create_event_study_plot(
    enroll_placebo_data,
    "Log Non-Education Enrollment Estimate",
    treatment_year = 2013,
    y_limits = c(-0.40, 0.30),
    color_mode = "color"
  )

  save_figure(fig5a_bw, "figure_5a_placebo_enrollments", p_color = fig5a_color)
}

# Figure 5B: Non-Education Major Graduations (Placebo)
# Priority: R individual CSV → Stata combined CSV fallback
if (file.exists("output/tables/placebo_non_ed_completions.csv")) {
  cat("Creating Figure 5B: Non-Education Graduations Placebo (from R data)...\n")
  grad_placebo_data <- process_event_study_csv("output/tables/placebo_non_ed_completions.csv")

  fig5b_bw <- create_event_study_plot(
    grad_placebo_data,
    "Log Completion Estimate",
    treatment_year = 2013,
    shade_start = 2015,
    y_limits = c(-0.20, 0.30)
  )
  fig5b_color <- create_event_study_plot(
    grad_placebo_data,
    "Log Completion Estimate",
    treatment_year = 2013,
    shade_start = 2015,
    y_limits = c(-0.20, 0.30),
    color_mode = "color"
  )
  save_figure(fig5b_bw, "figure_5b_placebo_graduations", p_color = fig5b_color)

} else if (file.exists("output/tables/placebo_event_study_total.csv")) {
  cat("Creating Figure 5B: Non-Education Graduations Placebo (from Stata data)...\n")
  grad_placebo_data <- process_placebo_csv(
    "output/tables/placebo_event_study_total.csv", column = 3, ref_year = 2012
  )

  fig5b_bw <- create_event_study_plot(
    grad_placebo_data,
    "Log Completion Estimate",
    treatment_year = 2013,
    shade_start = 2015,
    y_limits = c(-0.20, 0.30)
  )
  fig5b_color <- create_event_study_plot(
    grad_placebo_data,
    "Log Completion Estimate",
    treatment_year = 2013,
    shade_start = 2015,
    y_limits = c(-0.20, 0.30),
    color_mode = "color"
  )
  save_figure(fig5b_bw, "figure_5b_placebo_graduations", p_color = fig5b_color)
}

# Figure 5C: Other Education Graduations (Placebo)
# Priority: R individual CSV → Stata combined CSV fallback
if (file.exists("output/tables/placebo_other_ed_completions.csv")) {
  cat("Creating Figure 5C: Other Education Graduations Placebo (from R data)...\n")
  other_ed_data <- process_event_study_csv("output/tables/placebo_other_ed_completions.csv")

  fig5c_bw <- create_event_study_plot(
    other_ed_data,
    "Log Completion Estimate",
    treatment_year = 2013,
    shade_start = 2015,
    y_limits = c(-0.35, 0.55)
  )
  fig5c_color <- create_event_study_plot(
    other_ed_data,
    "Log Completion Estimate",
    treatment_year = 2013,
    shade_start = 2015,
    y_limits = c(-0.35, 0.55),
    color_mode = "color"
  )
  save_figure(fig5c_bw, "figure_5c_other_education_graduations", p_color = fig5c_color)

} else if (file.exists("output/tables/placebo_event_study_total.csv")) {
  cat("Creating Figure 5C: Other Education Graduations Placebo (from Stata data)...\n")
  other_ed_data <- process_placebo_csv(
    "output/tables/placebo_event_study_total.csv", column = 2, ref_year = 2012
  )

  fig5c_bw <- create_event_study_plot(
    other_ed_data,
    "Log Completion Estimate",
    treatment_year = 2013,
    shade_start = 2015,
    y_limits = c(-0.35, 0.55)
  )
  fig5c_color <- create_event_study_plot(
    other_ed_data,
    "Log Completion Estimate",
    treatment_year = 2013,
    shade_start = 2015,
    y_limits = c(-0.35, 0.55),
    color_mode = "color"
  )
  save_figure(fig5c_bw, "figure_5c_other_education_graduations", p_color = fig5c_color)
}

# ──────────────────────────────────────────────────────────────────────────────
# Figure A2: Title II Graduations Event Study
# ──────────────────────────────────────────────────────────────────────────────

titleII_grad_file <- "data/raw/title_ii_results/title_II_graduations_event_study.xlsx"

if (file.exists(titleII_grad_file)) {
  cat("Creating Figure A2: Title II Graduations Event Study...\n")

  # Read the Title II graduations event study results
  raw <- read_excel(titleII_grad_file, col_names = FALSE)

  # Extract year coefficients and standard errors
  years <- c()
  estimates <- c()
  ses <- c()

  for (i in 1:nrow(raw)) {
    val <- as.character(raw[i, 1])
    if (!is.na(val) && grepl("^year_\\d{4}$", val)) {
      year <- as.numeric(gsub("year_", "", val))
      coef_str <- as.character(raw[i, 2])
      se_str <- as.character(raw[i + 1, 2])

      # Parse coefficient (remove significance stars)
      coef <- as.numeric(gsub("[*]+$", "", coef_str))
      # Parse SE (remove parentheses)
      se <- as.numeric(gsub("[()]", "", se_str))

      if (!is.na(coef) && !is.na(se)) {
        years <- c(years, year)
        estimates <- c(estimates, coef)
        ses <- c(ses, se)
      }
    }
  }

  # Create data frame with confidence intervals
  titleII_grad_data <- tibble(
    year = years,
    estimate = estimates,
    se = ses
  ) %>%
    mutate(
      conf_low = estimate - 1.96 * se,
      conf_high = estimate + 1.96 * se
    )

  # Add reference year (2012)
  titleII_grad_data <- titleII_grad_data %>%
    add_row(year = 2012, estimate = 0, se = 0, conf_low = 0, conf_high = 0) %>%
    arrange(year)

  # Create event study plots (B&W + color)
  fig_a2_bw <- create_event_study_plot(titleII_grad_data, "Log Graduation Estimate",
                                        shade_start = 2015, y_limits = c(-1.0, 0.5))
  fig_a2_color <- create_event_study_plot(titleII_grad_data, "Log Graduation Estimate",
                                           shade_start = 2015, y_limits = c(-1.0, 0.5), color_mode = "color")

  save_figure(fig_a2_bw, "figure_a2_titleII_graduations", p_color = fig_a2_color)
}

# ──────────────────────────────────────────────────────────────────────────────
# Figure A3: Delta TDI vs % Change in New Teacher Licenses (Scatter Plot)
# ──────────────────────────────────────────────────────────────────────────────

# Try to load license data from JHR data folder
license_data_path <- "data/raw/licenses/state_data_clean.xlsx"

if (file.exists(license_data_path)) {
  cat("Creating Figure A3: Scatter Plot - Delta TDI vs Licenses...\n")

  # Load license data
  license_raw <- read_excel(license_data_path) %>%
    select(State, year, licenses, continuous_treat) %>%
    filter(!is.na(licenses))

  # Calculate % change in licenses from 2012 to 2016
  license_pct <- license_raw %>%
    filter(year %in% c(2012, 2016)) %>%
    tidyr::pivot_wider(names_from = year, values_from = licenses, names_prefix = "lic_") %>%
    filter(!is.na(lic_2012) & !is.na(lic_2016)) %>%
    mutate(pct_change = (lic_2016 - lic_2012) / lic_2012 * 100) %>%
    select(State, continuous_treat, pct_change)

  # Create scatter plots (B&W + color)
  fig_a3_bw <- ggplot(license_pct, aes(x = continuous_treat, y = pct_change)) +
    geom_smooth(method = "lm", formula = y ~ x, se = FALSE, color = "black", linewidth = 0.8) +
    geom_point(size = 2, color = "black") +
    geom_text_repel(aes(label = State), size = 3, max.overlaps = Inf, color = "black") +
    scale_x_continuous(name = "Change in Test Difficulty Index (2012-2014)") +
    scale_y_continuous(name = str_wrap("% Change in New Teacher Licenses (2012-2016)", 35),
                       labels = label_percent(scale = 1)) +
    theme_jhr() +
    theme(legend.position = "none", panel.grid.major.y = element_line(linetype = "dotted", color = "gray80"))

  fig_a3_color <- ggplot(license_pct, aes(x = continuous_treat, y = pct_change)) +
    geom_smooth(method = "lm", formula = y ~ x, se = FALSE, color = "black", linewidth = 0.8) +
    geom_point(size = 2, color = "#000080") +
    geom_text_repel(aes(label = State), size = 3, max.overlaps = Inf, color = "black") +
    scale_x_continuous(name = "Change in Test Difficulty Index (2012-2014)") +
    scale_y_continuous(name = str_wrap("% Change in New Teacher Licenses (2012-2016)", 35),
                       labels = label_percent(scale = 1)) +
    theme_jhr() +
    theme(legend.position = "none", panel.grid.major.y = element_line(linetype = "dotted", color = "gray80"))

  save_figure(fig_a3_bw, "figure_a3_scatter_licenses", width = 6, height = 4, p_color = fig_a3_color)

} else {
  cat("License data not found at:", license_data_path, "\n")
}

cat("\nAll figures generated in:", out_dir, "\n")
